# Explore topic 13c # First load all of the functions we will use source( "../gnrnd5.R") source( "../gnrnd4.R") source( "../pop_sd.R") source("../assess_normality.R") source( "../ci_unknown.R") # Topic 13c looks at generating a confidence # interval for a population mean when we do not # know the population standard deviation. # # Let us generate a population gnrnd5(145209499902, 3306001054) # put the population into big_pop big_pop <- L1 # let us say that we are taking a sample # of a certain size samp_size <- 38 # then, just so that we can all get the same # sample, generate the index values for # a sample of that size key1 <- 702370001+ (samp_size-1)*100 gnrnd4(key1, 500000001) L1 this_sample <- big_pop[ L1 ] # look at our sample this_sample # we can find the sample mean and the sample standard # deviation samp_mean <- mean( this_sample ) samp_sd <- sd( this_sample ) samp_mean samp_sd # Now, because we will use the sample standard deviation # to estimate the population standard deviation, the # distribution of the sample means will be a # Student's-t with (samp_size-1) degrees of freedom. # then we make our confidence interval for # some specified confidence level conf_level <- 0.95 # that means that we are missing 1-conf_level # which we will split in half, one half in # each tail t_over_2 <- (1-conf_level)/2 t_over_2 # we can find # samp_mean +/- t(alpha_over2)*samp_sd/sqrt(samp_size) # and that will be our confidence interval t_score_low <- qt( t_over_2, samp_size - 1 ) t_score_low t_score_high <- qt( t_over_2, samp_size - 1, lower.tail=FALSE) # opposite of low val t_score_high st_error <- samp_sd / sqrt( samp_size ) st_error # CI low value samp_mean + t_score_low*st_error # CI high value samp_mean + t_score_high*st_error ##### or we could have found the margin of error MOE <- t_score_high*st_error MOE # and then found the limits for the # confidence interval samp_mean - MOE # the low end samp_mean + MOE # the high end ### of course all of this could be done via # our ci_known function ci_unknown( samp_sd, samp_size, samp_mean, conf_level) ################################## # we could try this at a different confidence # level. Just alter the value in line 49 and # then run the subsequent lines, or just skip # down to line 83 and get the new values #################################### # If we express the confidence level as a # percent then we say that that percent of the # confidence intervals that we generate # using this methodology will contain the # true mean. That means, that at this point # in running the script, I do not know if the # 95% confidence interval that we generated, # namely (173.464, 213.299 ) does or does not # contain the true mean. # # Let us find the true mean and see if it is # in the interval. true_mean <- mean( big_pop ) true_mean # yes it is! # This has been an illustration, but let us # go through the process 10000 times and # see how many intervals that we generate this # way contain the true mean # first reset the confidence level and # sample size just in case we want to change # them later conf_level <- 0.95 samp_size <- 38 L3 <- 1:10000 for( i in 1:10000 ) { this_sample <- sample( big_pop, samp_size ) this_ci <- ci_unknown( sd(this_sample), samp_size, mean( this_sample), conf_level) if( this_ci[1] <= true_mean & true_mean <= this_ci[2] ) { L3[i] = "hit"} else { L3[i] = "missed"} } # see how we did table( L3 ) ######### # if we want we can do this again and we # can even change the values in lines 118 # and/or 119 if we want.